import csv
from os import read
from nltk import FreqDist
from nltk.util import pr
from operator import itemgetter


def get_word_frequency(data_path, name_row):
    with open(data_path, encoding='utf-8-sig') as csvfile:
        reader = csv.reader(csvfile)
        text = []
        for row in reader:
            tmp_row = row[name_row].split('_')
            text.extend(tmp_row)
    csvfile.close()
    # print(column)

    table_name_dct_tmp = FreqDist(text)

    table_name_dct = {}
    for word, freq in table_name_dct_tmp.items():
        table_name_dct[word] = freq
    table_name_dct = sorted(table_name_dct_tmp.items(), key=itemgetter(1), reverse=True)
    for word, freq in table_name_dct:
        if len(word) >= 3 and freq:
            print(word, freq)

# 测试词频，数据已脱敏
# get_word_frequency('data/table_names/newsapp_tablename_old.csv',2)
# get_word_frequency(r'data/index_names/index_name_new.csv', 1)
